library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.3.6 ✔ purrr 0.3.5
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.4.1
✔ readr 2.1.3 ✔ forcats 0.5.2 ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Sys.setenv(R_CONFIG_ACTIVE = Sys.info()["nodename"])
print(Sys.info()["nodename"])
nodename
"Benjamins-MacBook-Pro-2.local"
dropbbox_dir <- config::get("dev_analysis_data_dir")
time_points <- readr::read_csv(paste0(dropbbox_dir,"/SST_roi_by_time_point.csv"))
New names:Rows: 425081 Columns: 17── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): subid, go_no_go_condition_label, condition
dbl (14): ...1, tr, offset, harvardoxford-cortical_prob_Frontal Orbital Cortex, harvardoxford-cortical_prob_Cingulate Gyrus, anterior division, harvardoxford-subcortical_prob_Left Caudate, harvardoxford-subcortical_prob_Right Accum...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Now let’s see if we can graph putamen…
library(ggplot2)
# ggplot(time_points,
# aes(x=offset, y=`harvardoxford-subcortical_prob_Left Putamen`))+
# geom_point()+
# geom_smooth(method="loess",span=2,na.rm=TRUE) + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
# scale_y_continuous(limits=c(-2.5,2.5))+
# facet_wrap(~condition)
we could also subtract the average per TRIAL. Best we subtract the average across all of the offsets. it’s a bit arbitrary to use the average over all the areas; the idea is just to get a consistent amount of average.
roi_cols <- colnames(time_points)[grepl("harvardoxford",colnames(time_points))]
time_points$tr_roi_mean <- rowMeans(time_points[,roi_cols])
time_points<-
time_points %>% group_by(subid,wave) %>%
mutate(run_mean_across_rois = mean(tr_roi_mean, na.rm = TRUE))
#now mean-center the ROIs
time_points_c<-time_points
for (roi_col in roi_cols){
time_points_c[,roi_col]<-time_points_c[,roi_col]-time_points_c$run_mean_across_rois
}
sd(time_points$`harvardoxford-subcortical_prob_Left Putamen`)
[1] 0.4566477
sd(time_points_c$`harvardoxford-subcortical_prob_Left Putamen`)
[1] 0.4565868
library(ggplot2)
ggplot(time_points_c,
aes(x=offset, y=`harvardoxford-subcortical_prob_Left Putamen`,group=interaction(trial_n,subid,wave)))+
geom_line(alpha=0.2)+
#geom_point()+
#geom_smooth(method="loess",span=2,na.rm=TRUE) + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
scale_y_continuous(limits=c(-2.5,2.5))+
facet_wrap(~condition)
NA
Now let’s try just graphing the moving average…
library(tidyverse)
#get the range of offsets
min_offset<-round(min(time_points_c$offset))
max_offset <- round(max(time_points_c$offset))
seq_size <- 0.1
offset_size<-1
offset_times <- seq(min_offset,max_offset-max(offset_size,seq_size),seq_size)
dt_list<-list()
for (cond in unique(time_points_c$condition)){
print(cond)
for (ot_i in offset_times){
#<-offset_times[[20]]
tp_at_offset <- time_points_c %>% filter(offset>=ot_i & offset<(ot_i+offset_size) & condition==cond)
numeric_cols <- colnames(tp_at_offset)[sapply(tp_at_offset,class)=="numeric"]
tp_at_offset_m <- data.frame(t(colMeans(tp_at_offset[,numeric_cols],na.rm=TRUE)))
tp_at_offset_m$offset_start<-ot_i
tp_at_offset_m$condition<-cond
dt_list<-append(dt_list,list(tp_at_offset_m))
if((which(ot_i==offset_times)%%20)==0){
cat(". ")
}
}
print("")
}
[1] "FailedStop"
. . . . . . . . . . . . . . [1] ""
[1] "CorrectGo"
. . . . . . . . . . . . . . [1] ""
[1] "CorrectStop"
. . . . . . . . . . . . . . [1] ""
[1] "FailedGo"
. . . . . . . . . . . . . . [1] ""
df_offset<-data.frame(data.table::rbindlist(dt_list))
behavioral_data_w_tone_data <- readr::read_csv(paste0(dropbbox_dir,"/sst_behavioral_data_all_w_class_type_reveal_onset.csv"))
New names:Rows: 259840 Columns: 26── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): subid, go_no_go_condition_label, condition
dbl (22): ...1, trial_n, numchunks_reciprocal, go_no_go_condition, arrow_presented, ladder_number, LadderX_SSD_ms, subject_response, ladder_movement, reaction_time, SSD_technical, SSD_obser...
dttm (1): rundatetime
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# I want to know: what is the time delay from the tone sound to the PERIOD of the next trial?
#so I want to match each trial up with the _next_ trial
#grab the set of next trials-we want their onset and their duration
#then match that back onto n+1 of their prior trial
trial_df <- behavioral_data_w_tone_data %>% filter(condition!="Cue") %>%
arrange(trial_n) %>%
group_by(subid,waveid,runid) %>%
mutate(next_trial_n = lead(trial_n,1)) %>%
ungroup()
next_trial_df <- behavioral_data_w_tone_data %>%
filter(condition!="Cue") %>%
select(trial_n,condition,onset,trial_duration,subid,waveid,runid)
trials_with_next<-merge(
trial_df,next_trial_df,
by.x = c("subid","waveid","runid", "next_trial_n"),
by.y=c("subid","waveid","runid","trial_n"),
suffixes = c("","_next"))
now, mark for each trial the start and end time point of the next trial, and get measures of how long after each trial the next one appears
trials_with_next$trial_end_next <- trials_with_next$onset_next+trials_with_next$trial_duration_next
trials_with_next$class_type_reveal_onset_to_next_trial_start<-trials_with_next$onset_next-trials_with_next$class_type_reveal_onset
trials_with_next$class_type_reveal_onset_to_next_trial_end<-trials_with_next$trial_end_next-trials_with_next$class_type_reveal_onset
hist(trials_with_next$class_type_reveal_onset_to_next_trial_start,breaks = 100)
median(trials_with_next$class_type_reveal_onset_to_next_trial_start)
[1] 2.797249
hist(trials_with_next$class_type_reveal_onset_to_next_trial_end,breaks = 100)
median(trials_with_next$class_type_reveal_onset_to_next_trial_end)
[1] 4.808165
hist(trials_with_next$class_type_reveal_onset_to_next_trial_start)
next_trial_median <- data.frame(
"next_trial_start_median" = median(trials_with_next$class_type_reveal_onset_to_next_trial_start),
"next_trial_end_median" = median(trials_with_next$class_type_reveal_onset_to_next_trial_end)
)
quantiles <- (1:10)/10
# next_trial_gradient <- data.frame(
# "quantiles" = quantiles,
# "next_trial_start" = quantile(trials_with_next$class_type_reveal_onset_to_next_trial_start,(1:10)/10),
# "next_trial_end" = quantile(trials_with_next$class_type_reveal_onset_to_next_trial_end,(1:10)/10)
# )
gradient_range = seq(floor(min(trials_with_next$class_type_reveal_onset_to_next_trial_start)),ceiling(max(trials_with_next$class_type_reveal_onset_to_next_trial_end)),0.1)
in_range_by_gradient <- sapply(gradient_range,function(tp){
time_point_is_after_start <- tp > trials_with_next$class_type_reveal_onset_to_next_trial_start
time_point_is_before_end <- tp < trials_with_next$class_type_reveal_onset_to_next_trial_end
number_of_trials <- length(trials_with_next$class_type_reveal_onset_to_next_trial_start)
return(sum(time_point_is_after_start & time_point_is_before_end)/number_of_trials)
})
next_trial_gradient <- data.frame("tp"=gradient_range,"pct_in_range"=in_range_by_gradient)
now by ROI with facets for condition:
# roi_cols <- colnames(df_offset)[grepl("harvardoxford",colnames(df_offset))]
df_offset_long<-df_offset %>% pivot_longer(cols=roi_cols,names_to="ROI",values_to="value")
unique_rois<-sort(unique(df_offset_long$ROI))
unique_rois_ordered <- unique_rois[order(unique_rois %>% str_extract("(?<=.{3,5}\\.)([A-Za-z]*)$"))]
df_offset_long$ROI<-factor(df_offset_long$ROI,levels = unique_rois_ordered, ordered=TRUE)
ggplot(df_offset_long,
aes(x=offset_start, y=value,color=ROI))+
facet_wrap(~condition)+
#geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent")+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
geom_line(size=1,alpha=0.5)+
scale_color_viridis_d(option = "C")+
coord_cartesian(ylim=c(-0.1,0.25))+
ggplot_time_points_extras
#geom_point()+
#geom_smooth(method="loess",span=2,na.rm=TRUE) + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
#scale_y_continuous(limits=c(-2.5,2.5))
Let’s now try to contrast these time series to see what the CS-FS looks like. This will be interesting because it’s similar to the simple GLM result I got earlier that set me down the path of trying to analyze this.
trial_tps_compare_conditions<-df_offset_long %>% pivot_wider(id_cols=c(ROI,offset_start),names_from = "condition",values_from="value")
trial_tps_compare_conditions$CS_minus_FS<-trial_tps_compare_conditions$CorrectStop-trial_tps_compare_conditions$FailedStop
next_trial_gradient$hrf_tp<-next_trial_gradient$tp+5
ggplot(trial_tps_compare_conditions,
aes(x=offset_start, y=CS_minus_FS,color=ROI))+
#geom_tile(data = next_trial_gradient, aes(x=tp,alpha=pct_in_range,y=0),fill="#000000",color="transparent")+
geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent")+
#annotate("text",x=4,y=0.25,label="Subsequent Go Trial\nTime Range",size=2)+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
scale_alpha_continuous(name="Subsequent go trial range", range=c(0,0.7),labels=scales::percent_format())+
geom_line(size=1,alpha=0.5)+
scale_color_viridis_d(option = "C")+
ggplot_time_points_extras+
coord_cartesian(ylim=c(-0.1,0.25))+
labs(y="CS-FS BOLD activity difference")
NA
Contrast: - FS and CS - Caudate and Putamen - Individual differences in behavioral responsivity to Failed vs correct (in terms of speed-ups/slow-downs), and if this is linked to individual differences in caudate activity - or equally–we could do this at a trial level.
Easy way first. Take just CS or FS, then do one graph per ROI, use color with the order of the task.
roi_cols <- colnames(time_points_c)[grepl("harvardoxford",colnames(time_points_c))]
next_trial_gradient$hrf_tp<-next_trial_gradient$tp+5
time_points_long<-time_points_c %>% pivot_longer(cols=roi_cols,names_to="ROI",values_to="value")
unique_rois<-sort(unique(time_points_long$ROI))
unique_rois_ordered <- unique_rois[order(unique_rois %>% str_extract("(?<=.{3,5}\\.)([A-Za-z]*)$"))]
time_points_long$ROI<-factor(time_points_long$ROI,levels = unique_rois_ordered, ordered=TRUE)
for (c_i in unique(time_points_long$condition)){
c_plot <- ggplot(time_points_long %>% filter(condition==c_i),
aes(x=offset, y=value,color=trial_n))+
facet_wrap(~ROI)+
geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent")+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
geom_line(size=1,alpha=0.1)+
#scale_color_viridis_d(option = "C")+
#coord_cartesian(ylim=c(-0.1,0.25))+
ggplot_time_points_extras
print(c_plot)
}
That doesn’t work. OK, let’s try putting the activity into buckets of trials (early, mid, late) then graph for those three groups
library(tidyverse)
#get the range of offsets
min_offset<-round(min(time_points_c$offset))
max_offset <- round(max(time_points_c$offset))
seq_size <- 0.1
offset_size<-1
offset_times <- seq(min_offset,max_offset-max(offset_size,seq_size),seq_size)
dt_list<-list()
time_points_c$trial_bucket<-""
time_points_c$trial_bucket[time_points_c$trial_n<=40]<-"early"
time_points_c$trial_bucket[time_points_c$trial_n%in% seq(139-40,139,2)]<-"mid"
time_points_c$trial_bucket[time_points_c$trial_n %in% seq(239-40,239,2)]<-"late"
for (cond in unique(time_points_c$condition)){
print(cond)
for (tb_i in unique(time_points_c$trial_bucket)){
for (ot_i in offset_times){
tp_at_offset <- time_points_c %>% filter(offset>=ot_i & offset<(ot_i+offset_size) & condition==cond & trial_bucket==tb_i)
numeric_cols <- colnames(tp_at_offset)[sapply(tp_at_offset,class)=="numeric"]
tp_at_offset_m <- data.frame(t(colMeans(tp_at_offset[,numeric_cols],na.rm=TRUE)))
tp_at_offset_m$offset_start<-ot_i
tp_at_offset_m$condition<-cond
tp_at_offset_m$trial_bucket<-tb_i
dt_list<-append(dt_list,list(tp_at_offset_m))
if((which(ot_i==offset_times)%%20)==0){
cat(". ")
}
}
}
}
[1] "FailedStop"
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [1] "CorrectGo"
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [1] "CorrectStop"
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [1] "FailedGo"
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
df_offset_run_buckets<-data.frame(data.table::rbindlist(dt_list))
roi_cols <- colnames(df_offset_run_buckets)[grepl("harvardoxford",colnames(df_offset_run_buckets))]
df_offset_long<-df_offset_run_buckets %>% pivot_longer(cols=roi_cols,names_to="ROI",values_to="value")
df_offset_long <- df_offset_long[!is.nan(df_offset_long$tr), ]
df_offset_long$trial_bucket<-factor(df_offset_long$trial_bucket, levels=c("early","mid","late"))
df_offset_long$ROI_label<-df_offset_long$ROI %>% as.character %>% stringr::str_extract_all(pattern = "(?<=harvardoxford\\..{0,3}cortical_prob_)(.*)") %>% unlist
unique_rois<-sort(unique(df_offset_long$ROI_label))
unique_rois_ordered <- unique_rois[order(unique_rois %>% str_extract("(?<=.{3,5}\\.)([A-Za-z]*)$"))]
df_offset_long$ROI_label<-factor(df_offset_long$ROI_label,levels = unique_rois_ordered, ordered=TRUE)
for (c_i in unique(df_offset_long$condition)){
c_plot <- ggplot(df_offset_long %>% filter(condition==c_i),#filter(condition==c_i),
aes(x=offset_start, y=value,color=trial_bucket,group=trial_bucket))+
facet_wrap(~ROI_label,ncol = 4)+
#geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent")+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
geom_line(size=1,alpha=0.5)+
#scale_color_viridis_d(option = "C")+
#coord_cartesian(ylim=c(-0.1,0.25))+
ggplot_time_points_extras+
labs(
title="Stop Signal Task Activity",
subtitle=paste0(c_i)
)
#geom_point()+
#geom_smooth(method="loess",span=2,na.rm=TRUE) + #https://ggplot2.tidyverse.org/reference/geom_smooth.html
#scale_y_continuous(limits=c(-2.5,2.5))
print(c_plot)
}
next_trial_gradient$hrf_tp<-next_trial_gradient$tp+5
for (c_i in unique(df_offset_long$condition)){
c_plot <- ggplot(df_offset_long %>% filter(condition==c_i & trial_bucket!=""),
aes(x=offset_start, y=value,color=ROI_label))+
facet_wrap(~trial_bucket,nrow = 1)+
geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent",show.legend=FALSE)+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
geom_line(size=1,alpha=0.5)+
scale_color_viridis_d(option = "C")+
coord_cartesian(ylim=c(-0.5,0.5))+
ggplot_time_points_extras+
labs(
title="Stop Signal Task Activity",
subtitle=paste0(c_i)
)+theme(legend.position = "bottom")
print(c_plot)
}
Can we contrast CS and FS in the way we did above?
trial_tps_compare_conditions<-df_offset_long %>% pivot_wider(id_cols=c(ROI_label,offset_start,trial_bucket),names_from = "condition",values_from="value")
trial_tps_compare_conditions$CS_minus_FS<-trial_tps_compare_conditions$CorrectStop-trial_tps_compare_conditions$FailedStop
c_plot <- ggplot(trial_tps_compare_conditions %>% filter(trial_bucket!=""),
aes(x=offset_start, y=CS_minus_FS,color=ROI_label))+
facet_wrap(~trial_bucket,nrow = 1)+
geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent",show.legend=FALSE)+
annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
geom_line(size=1,alpha=0.5)+
scale_color_viridis_d(option = "C")+
coord_cartesian(ylim=c(-0.5,0.5))+
ggplot_time_points_extras+
labs(
title="Stop Signal Task Activity",
subtitle=paste0(c_i),
y="CS-FS BOLD activity difference")+
theme(legend.position = "bottom")
print(c_plot)
#
# ggplot(trial_tps_compare_conditions,
# aes(x=offset_start, y=CS_minus_FS,color=ROI))+
# #geom_tile(data = next_trial_gradient, aes(x=tp,alpha=pct_in_range,y=0),fill="#000000",color="transparent")+
# geom_tile(data = next_trial_gradient, aes(x=hrf_tp,alpha=pct_in_range,y=0),fill="#77ff77",color="transparent")+
# #annotate("text",x=4,y=0.25,label="Subsequent Go Trial\nTime Range",size=2)+
# annotate("text",x=10,y=0.22,label="Subsequent Go Trial\nExpected HRF",size=2)+
# scale_alpha_continuous(name="Subsequent go trial range", range=c(0,0.7),labels=scales::percent_format())+
# geom_line(size=1,alpha=0.5)+
# scale_color_viridis_d(option = "C")+
# ggplot_time_points_extras+
# coord_cartesian(ylim=c(-0.1,0.25))+
# labs(y="CS-FS BOLD activity difference")
There are a different distribution of the number of trials for each condition, but not in a way that clearly could produce the discrepancy I’m observing above.
time_points_c %>% filter(trial_bucket!="") %>% group_by(condition,trial_bucket) %>% summarize(trial_count=n())